# Timing
summary(dt_pr_cum$`timing_c_treatment_Page Submit`)
summary(dt_pr_cum$`timing_m_treatment_Page Submit`)
dt_pr_cum = dt_pr[as.Date(dt_pr$StartDate) > as.Date("2024-03-20"), ] %>%
filter(!is.na(email))
895-816
debugSource("~/Dropbox/Centro Latam Digital/ISOC/misinformation_migrants/data/checks/fb_performance.R", echo=TRUE)
dt_pr_cum = dt_pr_cum %>% filter(!duplicated(email))
dt_pr_cum
## Number of completes
nrow(dt_pr_cum)
debugSource("~/Dropbox/Centro Latam Digital/ISOC/misinformation_migrants/data/checks/fb_performance.R", echo=TRUE)
dt_pr_d = dt_pr[as.Date(dt_pr$StartDate) == as.Date("2024-04-04"), ]
## Encuestas no funcionales
dt_pr_d %>%
filter(consent==2
|adult==2
|country==2
|mexico==0
|temporal_status==0) %>%
nrow
## Encuestas comenzadas y no concluidas
dt_pr_d %>%
filter(consent==1
& adult==1
& country!=2
& mexico==1
& temporal_status!=0
& is.na(email)) %>%
nrow
## Encuestas efectivas
dt_pr_d %>%
filter(!is.na(email)) %>%
nrow
source("~/Dropbox/Centro Latam Digital/ISOC/misinformation_migrants/data/checks/fb_performance.R", echo=TRUE)
## Encuestas no funcionales
dt_pr_d %>%
filter(consent==2
|adult==2
|country==2
|mexico==0
|temporal_status==0) %>%
nrow
## Encuestas comenzadas y no concluidas
dt_pr_d %>%
filter(consent==1
& adult==1
& country!=2
& mexico==1
& temporal_status!=0
& is.na(email)) %>%
nrow
## Encuestas efectivas
dt_pr_d %>%
filter(!is.na(email)) %>%
nrow
## Number of completes
nrow(dt_pr_cum)
dt_pr_cum %>% filter(!is.na(email)) %>% select(email) %>% unique %>% nrow
dt_pr_cum = dt_pr_cum %>% filter(!duplicated(email))
## Attention check (1)
round(sum(dt_pr_cum$attention_check1==3, na.rm = TRUE)/nrow(dt_pr_cum), 3)
## Attention check (2)
round(sum(dt_pr_cum$attention_check2==3, na.rm = TRUE)/nrow(dt_pr_cum), 3)
### Blocks
sum(dt_pr_cum$misinformation==1, na.rm = T) # misinformation
sum(dt_pr_cum$misinformation==0, na.rm = T) # cybersecurity
## Misinformation
### T0
colSums(dt_pr_cum %>%
select(Treatment_misinformation_DO_t0_1,
Treatment_misinformation_DO_t0_2), na.rm = T) %>% sum()
### T1
colSums(dt_pr_cum %>%
select(Treatment_misinformation_DO_misinformation_t1_1,
Treatment_misinformation_DO_misinformation_t1_2), na.rm = T) %>% sum()
### T2
colSums(dt_pr_cum %>%
select(Treatment_misinformation_DO_misinformation_t2_1,
Treatment_misinformation_DO_misinformation_t2_2), na.rm = T) %>% sum()
## Cybersecurity
### T0
sum(!is.na(dt_pr_cum$Treatment_cybersecurity_DO_cybersec_t0))
### T1
sum(!is.na(dt_pr_cum$Treatment_cybersecurity_DO_cybersec_t1))
# Timing
summary(dt_pr_cum$`timing_c_treatment_Page Submit`)
summary(dt_pr_cum$`timing_m_treatment_Page Submit`)
source("~/Dropbox/Centro Latam Digital/ISOC/misinformation_migrants/data/checks/fb_performance.R", echo=TRUE)
## Encuestas no funcionales
dt_pr_d %>%
filter(consent==2
|adult==2
|country==2
|mexico==0
|temporal_status==0) %>%
nrow
## Encuestas comenzadas y no concluidas
dt_pr_d %>%
filter(consent==1
& adult==1
& country!=2
& mexico==1
& temporal_status!=0
& is.na(email)) %>%
nrow
## Encuestas efectivas
dt_pr_d %>%
filter(!is.na(email)) %>%
nrow
## Encuestas efectivas (removing repeated emails)
dt_pr_d %>%
filter(!is.na(email)) %>%
select(email) %>% unique %>% nrow
## Number of completes
nrow(dt_pr_cum)
dt_pr_cum %>% filter(!is.na(email)) %>% select(email) %>% unique %>% nrow
dt_pr_cum = dt_pr_cum %>% filter(!duplicated(email))
## Attention check (1)
round(sum(dt_pr_cum$attention_check1==3, na.rm = TRUE)/nrow(dt_pr_cum), 3)
## Attention check (2)
round(sum(dt_pr_cum$attention_check2==3, na.rm = TRUE)/nrow(dt_pr_cum), 3)
### Blocks
sum(dt_pr_cum$misinformation==1, na.rm = T) # misinformation
sum(dt_pr_cum$misinformation==0, na.rm = T) # cybersecurity
## Misinformation
### T0
colSums(dt_pr_cum %>%
select(Treatment_misinformation_DO_t0_1,
Treatment_misinformation_DO_t0_2), na.rm = T) %>% sum()
### T1
colSums(dt_pr_cum %>%
select(Treatment_misinformation_DO_misinformation_t1_1,
Treatment_misinformation_DO_misinformation_t1_2), na.rm = T) %>% sum()
### T2
colSums(dt_pr_cum %>%
select(Treatment_misinformation_DO_misinformation_t2_1,
Treatment_misinformation_DO_misinformation_t2_2), na.rm = T) %>% sum()
## Cybersecurity
### T0
sum(!is.na(dt_pr_cum$Treatment_cybersecurity_DO_cybersec_t0))
### T1
sum(!is.na(dt_pr_cum$Treatment_cybersecurity_DO_cybersec_t1))
# Timing
summary(dt_pr_cum$`timing_c_treatment_Page Submit`)
summary(dt_pr_cum$`timing_m_treatment_Page Submit`)
374+211
(374+211)/2
.5/2
params = expand_grid(n = seq(100, 500, by = 100)) # left-out
pwr_params <- map_dfr(
1:nrow(params),
function(x){
pwr_out <- power.t.test(n = params$n[x]*2, sd = 0.5, power = .8)
broom::tidy(pwr_out) %>%
mutate(n = n/2) %>%
return()
}
)
library(estimatr, quietly = TRUE)
library(foreign, quietly = TRUE)
library(ggplot2, quietly = TRUE)
library(tidyverse, quietly = TRUE)
library(reshape2, quietly = TRUE)
library(gtools, quietly = TRUE)
library(stringr, quietly = TRUE)
library(patchwork, quietly = TRUE)
library(knitr, quietly = TRUE)
library(kableExtra, quietly = TRUE)
library(CBPS, quietly = TRUE)
library(sp, quietly = TRUE) #GIS packages
library(spdep, quietly = TRUE)
library(rgdal, quietly = TRUE)
library(maps, quietly = TRUE)
library(mapdata, quietly = TRUE)
#library(maptools, quietly = TRUE)
library(ggmap, quietly = TRUE)
library(lfe, quietly = TRUE)
library(foreach, quietly = TRUE)
library(data.table, quietly = TRUE)
library(equivtest, quietly = TRUE) # equivalence analysis
library(haven, quietly = TRUE)
library(survey, quietly = TRUE)
library(powerLATE, quietly = TRUE)
params = expand_grid(n = seq(100, 500, by = 100)) # left-out
pwr_params = map_dfr(
1:nrow(params),
function(x){
pwr_out <- power.t.test(n = params$n[x]*2, sd = 0.5, power = .8)
broom::tidy(pwr_out) %>%
mutate(n = n/2) %>%
return()
}
)
pwr_params
pwr_params %>%
#mutate(sd = paste0("SD of Outcome: ", sd)) %>%
ggplot(aes(n, delta)) +
ggtitle('Power Analysis') +
geom_point() +
geom_line() +
#ylim(c(0,.2)) +
labs(x = "N per Group", y = "Minimum Detectable Effect") +
facet_wrap(~sd)
pwr_params %>%
#mutate(sd = paste0("SD of Outcome: ", sd)) %>%
ggplot(aes(n, delta)) +
ggtitle('Power Analysis') +
geom_point() +
geom_line() +
ylim(c(0,.2)) +
labs(x = "N per Group", y = "Minimum Detectable Effect")
pacman::p_load(data.table,
tidyverse,
estimatr)
library(readxl)
dt = read_excel("~/Downloads/Histórico_ERE_CCC_2014_2024 (Anonimizada).xlsx")
dt = dt %>% as.data.table() %>%
setnames(.,
c('Identificador',
'Semestre Evaluado',
'Aplicación Encuesta',
'Sector',
'El número de trabajadores en su empresa durante el semestre frente al semestre anterior',
'¿Realizó inversiones en su empresa durante el semestre como la compra de maquinaria, equipo o ampliación de capacidad productiva?',
'¿En qué proporción aumentó el número de trabajadores?',
'¿Cuál fue el principal problema de su empresa en el semestre para el normal desarrollo de sus actividades?',
'Otro:¿Cuál fue el principal problema de su empresa en el semestre para el normal desarrollo de sus actividades?',
'El número de trabajadores en su empresa en el próximo semestre frente al actual',
'El valor total de las ventas de su empresa en el semestre actual respecto al anterior'
),
c('id',
'date_assessment',
'date_survey',
'sector',
'variation_workers',
'invested_fix_capital',
'variation_workers_proportion',
'main_problem',
'main_problem2',
'expected_increase_workers',
'expected_increase_sales'
)) %>%
.[, `:=` (year=substr(date_assessment, 1,4) %>% as.numeric(),
semester=substr(date_assessment, 6,nchar(date_assessment)),
id=as.numeric(id),
main_problem_protest=case_when(main_problem %in% c('Protestas sectoriales',
'Dificultades de orden público durante el Paro Nacional',
'PAROS Y PROTESTAS',
'EL PARO NACIONAL')~1,
main_problem2 %in% c('EL PARO NACIONAL',
'PAROS Y PROTESTAS')~1,
TRUE ~ 0),
main_problem_descriptive = case_when(main_problem %in% c('Protestas sectoriales',
'Dificultades de orden público durante el Paro Nacional') ~ 'Protests',
main_problem == 'Trámites y restricciones' ~ 'Regulations',
main_problem %in% c('Pandemia Covid-19',
'Dificultades por pandemia de COVID-19',
'Medidas de confinamiento y restricciones por la pandemia de COVID-19') ~ 'COVID-19',
main_problem == 'Elevada carga tributaria' ~ 'Taxes',
main_problem == 'Contrabando' ~ 'Smuggling',
main_problem == 'Cartera vencida' ~ 'Overdue portfolio',
main_problem %in% c('Falta de financiación',
'Falta de capital de trabajo') ~ 'Capital/Credit',
main_problem == 'Escasez de trabajadores calificados' ~ 'Labor market',
main_problem %in% c('Alto costo de energía',
'Dificultades para el abastecimiento de materias primas o insumos',
'Escasez o alto costo de materias primas e insumos',
'Incremento en costos de insumos y materias primas',
'Alto costo de los insumos utilizados') ~ 'Production costs',
main_problem == 'Problemas climáticos' ~ 'Weather conditions',
main_problem == 'Tasa de cambio' ~ 'Exchange rate',
main_problem %in% c('Elevada competencia',
'Competencia desleal') ~ 'Competition',
main_problem == 'Falta de demanda' ~ 'Market demand',
main_problem %in% c('Ninguno',
'Ningun problema',
'NINGÚN PROBLEMA') ~ 'None',
main_problem == 'No sabe/No responde' ~ 'DK/DA',
main_problem == 'Otro' ~ 'Other',
TRUE ~ NA_character_),
decreased_workers=ifelse(variation_workers=='Disminuyó', 1,0),
invested_fix_capital=case_when(invested_fix_capital=='Si'~1,
invested_fix_capital=='Sí'~1,
invested_fix_capital=='No'~0,
TRUE ~ NA_real_),
expected_increase_workers=ifelse(expected_increase_workers=='Aumentará', 1,0),
expected_increase_sales=ifelse(expected_increase_sales=='Aumentará', 1,0),
sector=as.factor(sector),
sector_dummy=ifelse(sector=='Servicios', 1,0))]
dt[, `:=` (main_problem_taxes = ifelse(main_problem_descriptive=='Taxes',1,0),
main_problem_demand = ifelse(main_problem_descriptive=='Market demand',1,0),
main_problem_competition = ifelse(main_problem_descriptive=='Competition',1,0),
main_problem_covid = ifelse(main_problem_descriptive=='COVID-19',1,0))]
dt[, `:=` (t=case_when(year==2014 & semester=='I'~1,
year==2014 & semester=='II'~2,
year==2015 & semester=='I'~3,
year==2015 & semester=='II'~4,
year==2016 & semester=='I'~5,
year==2016 & semester=='II'~6,
year==2017 & semester=='I'~7,
year==2017 & semester=='II'~8,
year==2018 & semester=='I'~9,
year==2018 & semester=='II'~10,
year==2019 & semester=='I'~11,
year==2019 & semester=='II'~12,
year==2020 & semester=='I'~13,
year==2020 & semester=='II'~14,
year==2021 & semester=='I'~15,
year==2021 & semester=='II'~16,
year==2022 & semester=='I'~17,
year==2022 & semester=='II'~18,
year==2023 & semester=='I'~19,
year==2023 & semester=='II'~20,
year==2024 & semester=='I'~21,
TRUE ~ NA_real_),
municipality='Cali')]
dt[, `:=`(event=ifelse(t>15, 1,0) %>% as.factor(),
#post_event=ifelse(t==16|t==17, 1,0) %>% as.factor(),
clusters=paste(sector_dummy, t, sep = '_'))]
table(dt$event)
proportions = prop.table(table(dt$main_problem_descriptive)) %>% round(3)
# Reorder the levels of main_problem_descriptive variable based on proportions
ordered_levels = names(sort(proportions, decreasing = F))
# Reorder the factor levels
dt$main_problem_descriptive = factor(dt$main_problem_descriptive, levels = ordered_levels)
## decipher firm ID
dt[, id_test := substr(as.character(id), 6, nchar(id))] # doesn't work
## remove last 4 rows
dt = dt[1:(.N - 4)]
# tests
ccc_reg = lm_robust(expected_increase_workers ~ main_problem_protest*event
+ main_problem_covid*event + main_problem_taxes*event
+ main_problem_competition*event + main_problem_demand*event
+ as.factor(year) + invested_fix_capital*event
#+ expected_increase_sales*event
+ decreased_workers*event,
dt, clusters = clusters)
ccc_plot = tidy(ccc_reg) %>%
filter(grepl('main_problem', term)) %>%
mutate(t = ifelse(grepl('event', term), 'Likelihood of Increasing Number of Employees (Post)',
'Likelihood of Increasing Number of Employees (Pre)') %>%
factor(., levels = c('Likelihood of Increasing Number of Employees (Pre)',
'Likelihood of Increasing Number of Employees (Post)')),
variables = case_when(grepl('protest', term)==1 ~ 'Protests',
grepl('covid', term)==1 ~ 'COVID-19',
grepl('taxes', term)==1 ~ 'Taxes',
grepl('demand', term)==1 ~ 'Demand',
grepl('competition', term)==1 ~ 'Competition',
TRUE ~ NA_character_))
ggplot(ccc_plot, aes(x=variables, y=estimate)) +
geom_point() +
geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width=0.1) +
geom_hline(yintercept = 0, color = "black", linetype = "dashed") +
labs(x ='', y = 'Coefficient') +
coord_flip() +
facet_grid(~t) +
theme_bw()
# tests
ccc_reg = lm_robust(expected_increase_workers ~ main_problem_protest*event
+ main_problem_covid*event + main_problem_taxes*event
+ main_problem_competition*event + main_problem_demand*event
+ as.factor(year) + invested_fix_capital*event,
#+ expected_increase_sales*event
#+ decreased_workers*event,
dt, clusters = clusters)
ccc_plot = tidy(ccc_reg) %>%
filter(grepl('main_problem', term)) %>%
mutate(t = ifelse(grepl('event', term), 'Likelihood of Increasing Number of Employees (Post)',
'Likelihood of Increasing Number of Employees (Pre)') %>%
factor(., levels = c('Likelihood of Increasing Number of Employees (Pre)',
'Likelihood of Increasing Number of Employees (Post)')),
variables = case_when(grepl('protest', term)==1 ~ 'Protests',
grepl('covid', term)==1 ~ 'COVID-19',
grepl('taxes', term)==1 ~ 'Taxes',
grepl('demand', term)==1 ~ 'Demand',
grepl('competition', term)==1 ~ 'Competition',
TRUE ~ NA_character_))
ggplot(ccc_plot, aes(x=variables, y=estimate)) +
geom_point() +
geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width=0.1) +
geom_hline(yintercept = 0, color = "black", linetype = "dashed") +
labs(x ='', y = 'Coefficient') +
coord_flip() +
facet_grid(~t) +
theme_bw()
setwd("~/Dropbox/Academia/Migration Waves/Colombia COVID/Paper")
setwd("~/Dropbox/Academia/Migration Waves/Colombia COVID/Paper")
n_seq <- seq(25, 500, by = 10)
sd_data <- .5
eff_detect <- map_dbl(n_seq, ~pwr.t.test(n = .x, sig.level = 0.05, power = .8)$d*sd_data)
?map_dbl
??map_dbl
setwd("~/Dropbox/Academia/Migration Waves/Colombia COVID/Paper")
setwd("~/Dropbox/Academia/Migration Waves/Colombia COVID/Paper")
setwd("~/Dartmouth College Dropbox/Yang-Yang Zhou/Research Projects/Migration Waves/Colombia COVID/Paper")
setwd("Paper_Inputs")
## Load data and functions
library(estimatr, quietly = TRUE)
library(foreign, quietly = TRUE)
library(ggplot2, quietly = TRUE)
library(tidyverse, quietly = TRUE)
library(reshape2, quietly = TRUE)
library(gtools, quietly = TRUE)
library(stringr, quietly = TRUE)
library(patchwork, quietly = TRUE)
library(knitr, quietly = TRUE)
library(kableExtra, quietly = TRUE)
library(CBPS, quietly = TRUE)
#library(sp, quietly = TRUE) #GIS packages
#library(spdep, quietly = TRUE)
#library(rgdal, quietly = TRUE)
#library(maps, quietly = TRUE)
#library(mapdata, quietly = TRUE)
#library(ggmap, quietly = TRUE)
library(lfe, quietly = TRUE)
library(foreach, quietly = TRUE)
library(data.table, quietly = TRUE)
library(equivtest, quietly = TRUE) # equivalence analysis
yy_theme <- function(){
theme(panel.background = element_blank(),
legend.title = element_blank(),
plot.title = element_text(size = 10),
panel.border = element_rect(colour = "gray70", fill=NA, size=.11),
legend.position = "none",
axis.text.x = element_text(angle=0, vjust=0, hjust = .4, size=11))
}
# load 2019 data
df_col <- read_csv("colombia_clean.csv")
df_col <- df_col %>% # rescale these variables to min 0 max 1 for observational regression analysis
mutate(dir_contact_bi = case_when(dir_contact_index > 1~1,
dir_contact_index <= 1~0,
TRUE~NA_real_),
open_index_res = scales::rescale(open_index),
partisanship_res = scales::rescale(partisanship),
skilled_labor_res = scales::rescale(skilled_labor),
contract_res = scales::rescale(contract),
salary_res = scales::rescale(salary),
benefits_index_res = scales::rescale(benefits_index),
dir_contact_index_res = scales::rescale(dir_contact_index),
indir_contact_index_res = scales::rescale(indir_contact_index),
cultural_index_res = scales::rescale(cultural_index)
)
df_col_cali <- df_col[df_col$city == "Cali",]
df_col_cucuta <- df_col[df_col$city == "Cúcuta",]
# load 2021 panel data
col_wide <- read_csv("col_wide.csv") %>% mutate(open_index_4q_b = scales::rescale(open_index_4q_b),
# rescale openness to be between 0 and 1
open_index_4q_e = scales::rescale(open_index_4q_e))
col_long <- read_csv("col_long.csv") %>% mutate(open_index_4q = scales::rescale(open_index_4q))
col_wide$recontacted <- 1
col_wide$recontacted2 <- "Recontacted"
col_long <- col_long %>%
mutate(period = fct_relevel(period, "baseline", "endline"),
period_x_treatment = (period == "endline") * covid_first,
#city_num = case_when(city_num == 1~1, city_num == 2~0, TRUE~NA_real_),
religion_cath = case_when(religion2 == 1~1, !is.na(religion2)~0, TRUE~NA_real_),
religion_evan = case_when(religion2 == 2~1, !is.na(religion2)~0, TRUE~NA_real_),
religion_other = case_when(religion2 == 3~1, !is.na(religion2)~0, TRUE~NA_real_),
race_mestizo = case_when(race == 2~1, !is.na(race)~0, TRUE~NA_real_),
cov_selffamily = case_when((cov_self == 1 | cov_family == 1)~1,
(cov_self == 0 & cov_family == 0)~0,
TRUE~NA_real_),
ven_friends_bi = case_when(ven_friends > 0~1, ven_friends == 0~0, TRUE~NA_real_),
migQ_first = case_when(covid_first == 0~1, covid_first == 1~0,  TRUE~NA_real_),
cov_natgov_bi = case_when(cov_natgov >= 3~1, cov_natgov <3~0, TRUE~NA_real_),
cov_locgov_bi = case_when(cov_locgov >= 3~1, cov_locgov <3~0, TRUE~NA_real_)
)
# attrited group
df_col_attrit <- df_col[which(toupper(df_col$name) %notin% col_wide$name),]
outcomes <- c("mig_gen_country", "mig_gen_border", "mig_gen_city", "mig_gen_oth_country")
outcomes_names <- c(
"mig_gen_country" = "Not too many Vens",
"mig_gen_border" = "Border should be open",
"mig_gen_city" = "Our city should host Vens",
"mig_gen_oth_country" = "Right to seek asylum"
)
## Controls
controls <- c("age",
"male",
"city_num",
"race_mestizo",
"education",
"kids",
"marriage",
"religion_cath",
"religion_evan",
"religiosity",
"wealth_index")
# Create big dataset
col_endline <- bind_rows(
col_long %>%
filter(period == "endline") %>%
dplyr::select(!!c(outcomes, controls), cov_lostjob, covid_first) %>%
mutate(survey_group = "Panelists (endline)"),
col_new %>%
dplyr::select(!!c(outcomes, controls), cov_lostjob, covid_first) %>%
mutate(survey_group = "New Cross-Sectional Respondents",
kids = as.numeric(kids)),
col_cross %>%
dplyr::select(!!c(outcomes, controls), cov_lostjob, covid_first) %>%
mutate(survey_group = "All Respondents",
kids = as.numeric(kids))
)
View(col_wide)
cor_wide$hist_culture_b
col_wide$hist_culture_b
cor.test(col_wide$hist_culture_b, col_wide$mig_gen_border_b)
cor.test(col_wide$hist_culture_b, col_wide$mig_gen_border_b)$cor
cor.test(col_wide$hist_culture_b, col_wide$mig_gen_border_b)$est
table(col_wide$race)
table(df_col$race)
